#### load libraries ----
library("tidyverse")
library("pander")
#library("UnlikelyTools")
library("stargazer")
library("texreg")
library("mvtnorm")
library("MASS")
library("pBrackets")
#library("NRejections")

#### functions -----
## create function to produce plots of treatment effects
barplot.func <- function(x=dta$BasicloutMainAnalysis,
                         mntxt="Effects, All Voters",
                         baseline=0.13,
                         boost=0.02,
                         boost2=0,
                         boost3=0,
                         adjustment=0,p.vec){
  
  M <- 10000
  betas <- mvrnorm(M,mu=x$coefficients[,1],Sigma=vcov(x))
  xpg <- xg <- xc <- rep(0,dim(betas)[2])
  
  xpg[1] <- xg[1] <- xc[1] <- 1
  xg[2] <- 1
  xpg[4] <- 1
  
  pred.xpg <- (xpg)%*%t(betas)+adjustment
  pred.xg <- xg%*%t(betas)+adjustment
  pred.xc <- xc%*%t(betas)+adjustment
  
  mn <- quantile(c(pred.xpg,pred.xg,pred.xc),.01)
  mx <- quantile(c(pred.xpg,pred.xg,pred.xc),.99)+.10
  #####
  
  bp <- barplot(c(x$coefficients[1,1]+adjustment,
                  x$coefficients[1,1]+x$coefficients[2,1]+adjustment,
                  x$coefficients[1,1]+x$coefficients[4,1]+adjustment),
                names.arg=c("Control","General","Primary & General"),
                ylim=c(0,mx),
                main=mntxt)
  
  lines(x=c(bp[1,1],bp[1,1]),
        y=c(quantile(pred.xc,.025),quantile(pred.xc,.975)),
        lwd=2)
  
  lines(x=c(bp[2,1],bp[2,1]),
        y=c(quantile(pred.xg,.025),quantile(pred.xg,.975)),
        lwd=2)
  
  lines(x=c(bp[3,1],bp[3,1]),
        y=c(quantile(pred.xpg,.025),quantile(pred.xpg,.975)),
        lwd=2)
  
  text(x=bp[1,1],y=baseline+boost+adjustment,
       paste("Mean=",round(x$coefficients[1,1]+adjustment,digits=3),sep="")
  )
  text(x=bp[2,1],y=baseline+boost+adjustment,
       paste("Mean=",round(x$coefficients[1,1]+x$coefficients[2,1]+adjustment,digits=3),
             sep="")
  )
  text(x=bp[3,1],y=baseline+boost+adjustment,
       paste("Mean=",round(x$coefficients[1,1]+x$coefficients[4,1]+adjustment,digits=3),
             sep="")
  )
  
  brackets(x1=bp[1,1],x2=bp[2,1],y1=(mx-.075-boost3),y2=(mx-0.075-boost3),h=0.01)
  text(x=mean(c(bp[1,1],bp[2,1])),y=mx-.055,paste("Treatment=",
                                                  round(x$coefficients[2,1],digits=3),
                                                  ", Adj. P=",round(p.vec[1],digits=2),
                                                  sep=" "))
  
  brackets(x1=bp[1,1],x2=bp[3,1],y1=(mx-.035),y2=(mx-0.035),h=0.01)
  text(x=mean(c(bp[1,1],bp[3,1])),y=mx-0.015,paste("Treatment=",
                                                   round(x$coefficients[4,1],digits=3),
                                                   ", Adj. P=",round(p.vec[2],digits=2),
                                                   sep=" "))
  
}



#### set working directory to your location of replication folder----
setwd("C:/Users/torig/Downloads/voter turnout replication")
#setwd("/users/danhop/Dropbox/pbsi/voter turnout/09_Code/voter turnout replication/")
pacman::p_load("pander", "tm")

#### load data and identify treatment arms -----
load(file = "outcome_vf_census_ANON_03082022.RData")
dta.clean <- dta.clean %>% 
    mutate(
       control_group = ifelse(treat == "Control" | treat == "Extra", 1, 0),
        primary_mailer = ifelse(grepl("Primary", treat, fixed = TRUE), 1, 0),
        neighbor_mailer = ifelse(grepl("Neighbor", treat, fixed = TRUE), 1, 0),
        treat_amount = case_when(
            treat %in% c("Control", "Extra") ~ "Control",
            str_detect(treat, "General") ~ "Treat: General",
            str_detect(treat, "Primary") ~ "Treat: Primary"),
          treat_framing = case_when(
            treat %in% c("Control", "Extra") ~ "Control",
            str_detect(treat, "City") ~ "Treat: City",
            str_detect(treat, "Neighbor") ~ "Treat: Neighbor")
      )
list_of_lm <- list()
list_of_lm_primary <- list()

# all people who received a treatment
dta.clean$treatment <- 0
dta.clean$treatment[dta.clean$control_group != 1] <- 1

###### recode variables ----
dta.clean$treat.neighbor <- 0 #H2a
dta.clean$treat.neighbor[dta.clean$treat == "General | Neighbor"] <- 1
dta.clean$treat.neighbor[dta.clean$treat == "Primary | Neighbor"] <- 1
dta.clean$treat.city <- 0 #H2b
dta.clean$treat.city[dta.clean$treat == "General | City"] <- 1
dta.clean$treat.city[dta.clean$treat == "Primary | City"] <- 1
dta.clean$treat.primary.general <- 0 #H2c
dta.clean$treat.primary.general[dta.clean$treat == "Primary | Neighbor"] <- 1
dta.clean$treat.primary.general[dta.clean$treat == "Primary | City"] <- 1
dta.clean$treat.general <- 0 #H2d
dta.clean$treat.general[dta.clean$treat == "General | Neighbor"] <- 1
dta.clean$treat.general[dta.clean$treat == "General | City"] <- 1

dta.clean$control <- 0
dta.clean$control[dta.clean$treat %in% c("Control","Extra")] <- 1

# The Combined lm ----
# Appendix A.3 - Table 1
loutH2 <- lm(turnout_general ~ treat.general + treat.primary.general + treat.neighbor + as.factor(sex) +
               as.factor(party) + as.factor(ward) + 
               as.factor(vote_history) + as.factor(age_bin), data = dta.clean)
summary(loutH2)
texreg(loutH2,digits=4,stars=0.05,single.row=T,longtable=T,dcolumn=T)

list_of_lm[["loutMainAnalysis"]] <- summary(loutH2)
rm(loutH2)

### simulate to estimate p-value for treatment comparison
### set number of simulations
M <- 500
rmat <- matrix(NA,M,2)
def <- 1-sum(c(c(mean(dta.clean$treat.general),
                 mean(dta.clean$treat.primary.general))))
for(i in 1:M){
  var <- t(rmultinom(dim(dta.clean)[1],size=1,prob=c(def,mean(dta.clean$treat.general),
                                                 mean(dta.clean$treat.primary.general))))
  dta.clean$placebo1 <- (var[,2]==1)*1 
  dta.clean$placebo2 <-   (var[,3]==1)*1 
  
  lout.hold <- lm(turnout_general ~ placebo1+placebo2 + as.factor(sex) +
                    as.factor(party) + as.factor(ward) + 
                    as.factor(vote_history) + as.factor(age_bin), data = dta.clean)
  rmat[i,1] <- summary(lout.hold)$coef[2,4]
  rmat[i,2] <- summary(lout.hold)$coef[3,4]
}
rmat10 <- rmat

#### estimate corrected standard errors
adj_Wstep(summary(loutH2)$coef[c(2,3),4],t(rmat10))


#### proportions test (in text)---- **** remove ones not in paper
s1 <- l1 <- c()

# Number of people who turned out among control group
s1[1] <- sum(dta.clean$turnout_general[dta.clean$treat.general==0 & dta.clean$treat.primary.general==0]==T,na.rm=T)
# Total number of people in control group
l1[1] <- sum(dta.clean$treat.general==0 & dta.clean$treat.primary.general==0 & ! dta.clean$turnout_general %in% c(NA))

# Number of people who turned out among General only postcard treated group
s1[2] <- sum(dta.clean$turnout_general[dta.clean$treat.general==1]==T,na.rm=T)
# Total number of people in General treated group
l1[2] <- sum(dta.clean$treat.general==1 & ! dta.clean$turnout_general %in% c(NA))

# Number of people who turned out among Primary and General postcard treated group
s1[3]<- sum(dta.clean$turnout_general[dta.clean$treat.primary.general==1]==T,na.rm=T)
# Total number of people in Primary and General treated group
l1[3] <- sum(dta.clean$treat.primary.general==1 & ! dta.clean$turnout_general %in% c(NA))

# Number of people who turned out among all treated
s1[4]<- sum(dta.clean$turnout_general[dta.clean$treat.primary.general==1 | dta.clean$treat.general==1]==T,na.rm=T)
# Total number of people in treated groups
l1[4] <- sum(dta.clean$treat.primary.general[dta.clean$treat.primary.general==1 & ! dta.clean$turnout_general %in% c(NA)]) + sum(dta.clean$treat.general[dta.clean$treat.general==1 & ! dta.clean$turnout_general %in% c(NA)])


## comparing turnout rates for:
# control, treated with General only postcards
prop.test(s1[1:2],l1[1:2])
# treated with General only postcards, treated with Primary and General postcards
prop.test(s1[2:3],l1[2:3])
# control, treated with Primary and General postcards
prop.test(s1[c(1,3)],l1[c(1,3)])
# control, all treated
prop.test(s1[c(1,4)],l1[c(1,4)])


#### generate coefficient, SE, p-value for comparison of treatments (in text)
lout.extra <- lm(turnout_general ~ control + treat.primary.general + as.factor(sex) +
               as.factor(party) + as.factor(ward) + 
               as.factor(vote_history) + as.factor(age_bin), data = dta.clean)
summary(lout.extra)


### simulate to estimate p-value for treatment comparison
### set number of simulations
M <- 500
rmat <- matrix(NA,M,2)
def <- 1-sum(c(c(mean(dta.clean$control),
                 mean(dta.clean$treat.primary.general))))
for(i in i:M){
  var <- t(rmultinom(dim(dta.clean)[1],size=1,prob=c(def,mean(dta.clean$control),
                                                 mean(dta.clean$treat.primary.general))))
  
  dta.clean$placebo1 <- (var[,2]==1)*1 
  dta.clean$placebo2 <-   (var[,3]==1)*1 
  
  lout.hold <- lm(turnout_general ~ placebo1+placebo2 + as.factor(sex) +
                    as.factor(party) + as.factor(ward) + 
                    as.factor(vote_history) + as.factor(age_bin), data = dta.clean)
  rmat[i,1] <- summary(lout.hold)$coef[2,4]
  rmat[i,2] <- summary(lout.hold)$coef[3,4]
}
rmat10 <- rmat

adj_Wstep(summary(lout.extra)$coef[c(2,3),4],t(rmat10))


##### estimate by turnout history, for Appendix Figure 7 -----

dtan <- list()

lout1 <- lm(turnout_general ~ treat.general+treat.neighbor + treat.primary.general  +  as.factor(sex) +
              as.factor(party) + as.factor(ward) + 
              as.factor(vote_history) + as.factor(age_bin), data = dta.clean)

lout2 <- lm(turnout_general ~ treat.general +treat.neighbor+ treat.primary.general  +  as.factor(sex) +
              as.factor(party) + as.factor(ward) + 
              as.factor(age_bin), data = dta.clean[dta.clean$vote_history=="General Primary Voter",])

dtan$GeneralPrimaryVoterloutMainAnalysis <- summary(lout2)

lout3 <- lm(turnout_general ~ treat.general +treat.neighbor + treat.primary.general +  as.factor(sex) +
              as.factor(party) + as.factor(ward) + 
              as.factor(age_bin), data = dta.clean[dta.clean$vote_history=="General Voter",])

dtan$GeneralVoterloutMainAnalysis <- summary(lout3)

lout4 <- lm(turnout_general ~ treat.general +treat.neighbor + treat.primary.general +  as.factor(sex) +
              as.factor(party) + as.factor(ward) + 
              as.factor(age_bin), data = dta.clean[dta.clean$vote_history=="Low Voter",])

dtan$LowVoterloutMainAnalysis <- summary(lout4)

lout5 <- lm(turnout_general ~ treat.general  +treat.neighbor+ treat.primary.general  +  as.factor(sex) +
              as.factor(party) + as.factor(ward) + 
              as.factor(age_bin), data = dta.clean[dta.clean$vote_history=="Municipal Primary Voter",])

dtan$MunicipalPrimaryVoterloutMainAnalysis <- summary(lout5)

lout6 <- lm(turnout_general ~ treat.general +treat.neighbor+ treat.primary.general  +  as.factor(sex) +
              as.factor(party) + as.factor(ward) + 
              as.factor(age_bin), data = dta.clean[dta.clean$vote_history=="Municipal Voter",])

dtan$MunicipalVoterloutMainAnalysis <- summary(lout6)


p.vec <- c(summary(lout1)$coef[c(2,4),4],
           summary(lout2)$coef[c(2,4),4],
           summary(lout3)$coef[c(2,4),4],
           summary(lout4)$coef[c(2,4),4],
           summary(lout5)$coef[c(2,4),4],
           summary(lout6)$coef[c(2,4),4]
)
round(p.vec,digits=3)

M <- 500
rmat <- matrix(NA,M,12)
def <- 1-sum(c(c(mean(dta.clean$treat.general),
                 mean(dta.clean$treat.primary.general))))
for(i in 1:M){
  var <- t(rmultinom(dim(dta.clean)[1],size=1,prob=c(def,mean(dta.clean$treat.general),
                                                 mean(dta.clean$treat.primary.general))))
  
  dta.clean$placebo1 <- (var[,2]==1)*1 
  dta.clean$placebo2 <-   (var[,3]==1)*1 
  
  lout.hold <- lm(turnout_general ~ placebo1+placebo2 + as.factor(sex) +
                    as.factor(party) + as.factor(ward) + 
                    as.factor(vote_history) + as.factor(age_bin), data = dta.clean)
  rmat[i,1] <- summary(lout.hold)$coef[2,4]
  rmat[i,2] <- summary(lout.hold)$coef[3,4]
  lout.hold1 <- lm(turnout_general ~ placebo1+placebo2 + as.factor(sex) +
                     as.factor(party) + as.factor(ward) + as.factor(age_bin), 
                   data = dta.clean[dta.clean$vote_history=="General Primary Voter",])
  rmat[i,3] <- summary(lout.hold1)$coef[2,4]
  rmat[i,4] <- summary(lout.hold1)$coef[3,4]
  
  lout.hold2 <- lm(turnout_general ~ placebo1+placebo2 + as.factor(sex) +
                     as.factor(party) + as.factor(ward) + as.factor(age_bin), 
                   data = dta.clean[dta.clean$vote_history=="General Voter",])
  rmat[i,5] <- summary(lout.hold2)$coef[2,4]
  rmat[i,6] <- summary(lout.hold2)$coef[3,4]
  
  lout.hold3 <- lm(turnout_general ~ placebo1+placebo2 + as.factor(sex) +
                     as.factor(party) + as.factor(ward) + as.factor(age_bin), 
                   data = dta.clean[dta.clean$vote_history=="Low Voter",])
  rmat[i,7] <- summary(lout.hold3)$coef[2,4]
  rmat[i,8] <- summary(lout.hold3)$coef[3,4]
  
  lout.hold4 <- lm(turnout_general ~ placebo1+placebo2 + as.factor(sex) +
                     as.factor(party) + as.factor(ward) + as.factor(age_bin), 
                   data = dta.clean[dta.clean$vote_history=="Municipal Primary Voter",])
  rmat[i,9] <- summary(lout.hold4)$coef[2,4]
  rmat[i,10] <- summary(lout.hold4)$coef[3,4]
  
  lout.hold5 <- lm(turnout_general ~ placebo1+placebo2 + as.factor(sex) +
                     as.factor(party) + as.factor(ward) + as.factor(age_bin), 
                   data = dta.clean[dta.clean$vote_history=="Municipal Voter",])
  rmat[i,11] <- summary(lout.hold5)$coef[2,4]
  rmat[i,12] <- summary(lout.hold5)$coef[3,4]
}
rmat21 <- rmat

### can load previously simulated p-values
adj.p <- adj_Wstep(p.vec,t(rmat21))

#save(adj.p,file="~/Dropbox/pbsi/voter turnout/08_Data/voter-history-pvalues-03092022.Rdata")
load("voter-history-pvalues-03092022.Rdata")

#load("~/Dropbox/pbsi/voter turnout/08_Data/voter-history-pvalues.Rdata")
#p.vec <- adj.p 

# Figure 7 - treatment effects among diff groups wrapped by vote histor
pdf("primary-exp-by-history-03092022.pdf",width=8,height=12)
par(mfcol=c(3,2))
barplot.func(dtan$LowVoterloutMainAnalysis,
             mntxt="Effects, Low-Turnout Voters",
             baseline=0.01,boost=.02,boost2=0.01,#boost=0.0075,
             adjustment=0.01713502-0.03309,p.vec=adj.p[7:8])
barplot.func(dtan$GeneralVoterloutMainAnalysis,
             mntxt="Effects, General Election Voters",
             baseline=0.15,boost=0.02,
             adjustment=0.1445787-0.240778,p.vec=adj.p[5:6])
barplot.func(dtan$GeneralPrimaryVoterloutMainAnalysis,
             mntxt="Effects, Primary Voters",
             baseline=0.15,boost=0.02,
             adjustment=0.2452983-dtan$GeneralPrimaryVoterloutMainAnalysis$coefficients[1,1],
             p.vec=adj.p[3:4])
barplot.func(dtan$MunicipalVoterloutMainAnalysis,
             mntxt="Effects, Municipal Voters",
             baseline=0.22,boost=0.03,
             adjustment=0.48242-dtan$MunicipalVoterloutMainAnalysis$coefficients[1,1],
             p.vec=adj.p[11:12])
barplot.func(dtan$MunicipalPrimaryVoterloutMainAnalysis,
             mntxt="Effects, Municipal Primary Voters",
             baseline=0.22,boost=0.03,boost2=-.02,
             boost3=.025,
             adjustment=0.6119832-dtan$MunicipalPrimaryVoterloutMainAnalysis$coefficients[1,1],
             p.vec=adj.p[9:10])
dev.off()

### Moderation analysis ----

## populations < 25th percentile Black  
dta.b25 <- dta.clean[dta.clean$CTPCNHBL15 < summary(dta.clean$CTPCNHBL15)[2] & ! dta.clean$CTPCNHBL15 %in% c(NA) & 
                   ! dta.clean$turnout_general %in% c(NA) & ! dta.clean$treat_framing %in% c(NA,""),]
dta.b25 <- dta.b25[! dta.b25$turnout_general %in% c(NA),]
loutBlack25 <- lm(turnout_general ~ treat.general + treat.neighbor + treat.primary.general + as.factor(sex) +
                    as.factor(party) + as.factor(ward) + 
                    as.factor(vote_history) + as.factor(age_bin), data = dta.b25)
mean(dta.b25$turnout_general[dta.b25$treat %in% c("Extra","Control")],na.rm=T)
loutBlack25Summary <- summary(loutBlack25)

## populations >75th percentile Black
dta.b75 <- dta.clean[dta.clean$CTPCNHBL15 > summary(dta.clean$CTPCNHBL15)[5] & ! dta.clean$CTPCNHBL15 %in% c(NA) & 
                   ! dta.clean$turnout_general %in% c(NA) & ! dta.clean$treat_framing %in% c(NA,""),]
dta.b75 <- dta.b75[! dta.b75$turnout_general %in% c(NA),]

loutBlack75 <- lm(turnout_general ~ treat.general + treat.neighbor + treat.primary.general + as.factor(sex) +
                    as.factor(party) + as.factor(ward) + 
                    as.factor(vote_history) + as.factor(age_bin), data = dta.b75)
loutBlack75Summary <- summary(loutBlack75)

mean(dta.b75$turnout_general[dta.b75$treat %in% c("Extra","Control")],na.rm=T)


# overall treatment effects, population <25th percentile Black, >75th percentile 

list_of_lm[["loutBlack25Summary"]] <- loutBlack25Summary
list_of_lm[["loutBlack75Summary"]] <- loutBlack75Summary

rm(dta.b25, dta.b75, loutBlack25, loutBlack75)

## by percentage of college graduates 
# columns: https://mdmb-mbweb.s3.amazonaws.com/media/courses/research-practicum/dcareaCensusVariableNames.pdf
# percentage of population that are college graduates
dta.clean$CTPCCOLL15 <- (dta.clean$SE_T025_005.15 + dta.clean$SE_T025_006.15 + 
                       dta.clean$SE_T025_007.15 + dta.clean$SE_T025_008.15)/dta.clean$SE_T014_001.15
# areas with fewer college grads than the 25th percentile 
dta.college25 <- dta.clean[dta.clean$CTPCCOLL15 < summary(dta.clean$CTPCCOLL15)[2] & ! 
                         dta.clean$CTPCCOLL15 %in% c(NA) & ! dta.clean$turnout_general %in% c(NA) 
                       & ! dta.clean$treat_framing %in% c(NA, ""),]
dta.college25 <- dta.college25[! dta.college25$turnout_general %in% c(NA),]

# regressions
loutCollege25 <- lm(turnout_general ~ treat.general + treat.neighbor + treat.primary.general + as.factor(sex) +
                      as.factor(party) + as.factor(ward) + 
                      as.factor(vote_history) + as.factor(age_bin), data = dta.college25)
loutCollege25Summary <- summary(loutCollege25)



# areas with more college grads than the 75th percentile 
dta.college75 <- dta.clean[dta.clean$CTPCCOLL15 > summary(dta.clean$CTPCCOLL15)[5] & ! 
                         dta.clean$CTPCCOLL15 %in% c(NA) & ! dta.clean$turnout_general %in% c(NA) 
                       & ! dta.clean$treat_framing %in% c(NA, ""),]
dta.college75 <- dta.college75[! dta.college75$turnout_general %in% c(NA),]

# regressions
loutCollege75 <- lm(turnout_general ~ treat.general + treat.neighbor + treat.primary.general + as.factor(sex) +
                      as.factor(party) + as.factor(ward) + 
                      as.factor(vote_history) + as.factor(age_bin), data = dta.college75)
loutCollege75Summary <- summary(loutCollege75)



list_of_lm[["loutCollege25Summary"]] <- loutCollege25Summary
list_of_lm[["loutCollege75Summary"]] <- loutCollege75Summary

rm(dta.college25, dta.college75, loutCollege25, loutCollege75)

## by mobility 
# percentage of population that are college graduates
# people with less mobility than the 25th percentile 
dta.mobility25 <- dta.clean %>% 
  filter(CTPCSAHO15 < quantile(dta.clean$CTPCSAHO15, c(.25), na.rm = TRUE))


# regressions
loutMobility25 <- lm(turnout_general ~ treat.general + treat.neighbor + treat.primary.general + as.factor(sex) +
                       as.factor(party) + as.factor(ward) + 
                       as.factor(vote_history) + as.factor(age_bin), data = dta.mobility25)
loutMobility25Summary <- summary(loutMobility25)

# people with more mobility than the 75th percentile 
dta.mobility75 <- dta.clean %>% 
  filter(CTPCSAHO15 > quantile(dta.clean$CTPCSAHO15, c(.75), na.rm = TRUE))

# regressions
loutMobility75 <- lm(turnout_general ~ treat.general + treat.neighbor + treat.primary.general + as.factor(sex) +
                       as.factor(party) + as.factor(ward) + 
                       as.factor(vote_history) + as.factor(age_bin), data = dta.mobility75)
loutMobility75Summary <- summary(loutMobility75)

# overall treatment effects, mobility <25th percentile, mobility >75th percentile
list_of_lm[["loutMobility25Summary"]] <- loutMobility25Summary
list_of_lm[["loutMobility75Summary"]] <- loutMobility75Summary
rm(dta.mobility25, dta.mobility75, loutMobility25, loutMobility75)




#### how many extra votes?
### primary plus
table(dta.clean$treat.primary.general)
# No. of people in Primary and General treatment group * treatment effect size
24073*  0.015073 = 362.8523 # new votes
# No. of people in Primary and General treatment group / no. of votes garnered
### One new vote per: 
24073/362.8523 = 66.3438 # targets


### just general
table(dta.clean$treat.general)
# No. of people in General only treatment group * treatment effect size
24029 * 0.008151 = 195.8604 # new votes
# No. of people in General only treatment group / no. of votes garnered
### One new vote per: 
24029/195.8604 = 122.6843 # targets


#### Primary election turnout effects-----
loutH2.primary <- lm(turnout_primary ~ I(treat %in% c("Primary | City","Primary | Neighbor")) + 
                       as.factor(sex) +
                       as.factor(party) + as.factor(ward) + 
                       as.factor(vote_history) + as.factor(age_bin), data = dta.clean,x=F)
summary(loutH2.primary)

# Appendix Table 2
texreg(loutH2.primary,digits=4,stars=0.05,single.row=T,longtable=T,dcolumn=T)


#### any treatment
loutH2.generic <- lm(turnout_general ~ I(! treat %in% c("Control","Extra")) + 
                      as.factor(sex) +
                     as.factor(party) + as.factor(ward) + 
                       as.factor(vote_history) + as.factor(age_bin), data = dta.clean,x=F)
summary(loutH2.generic)


## Original analysis among separate groups of respondents with
## various levels of voter history
original_lm_analysis_minus_history <- function(data, yvar){
  level <- trimws(data$vote_history[1])
  data <- data %>% 
    filter(!is.na(!!yvar))
  print(level)
  list_lm_return <- list()
  yvar <- as.symbol(yvar)
  lout_basic <- eval(bquote(lm( .(yvar) ~ treat.general + treat.neighbor + treat.primary.general + as.factor(sex) +
                     as.factor(party) + as.factor(ward) + 
                     as.factor(age_bin), data = data)))
  list_lm_return[[paste0(level, "loutMainAnalysis")]] <- summary(lout_basic)
  rm(lout_basic)
  # Subsetting for the black quantiles
  dta.b25 <- data[data$CTPCNHBL15 < summary(data$CTPCNHBL15)[2] & ! data$CTPCNHBL15 %in% c(NA) & 
                    ! data$turnout_general %in% c(NA) & ! data$treat_framing %in% c(NA,""),]
  dta.b25 <- dta.b25[! dta.b25$turnout_general %in% c(NA),]
  loutBlack25 <- eval(bquote(lm(.(yvar) ~ treat.general + treat.neighbor + treat.primary.general + as.factor(sex) +
                      as.factor(party) + as.factor(ward) + 
                      as.factor(age_bin), data = dta.b25)))
  list_lm_return[[paste0(level, "loutBlack25Summary")]] <- summary(loutBlack25)
  rm(dta.b25, loutBlack25)
  dta.b75 <- data[data$CTPCNHBL15 > summary(data$CTPCNHBL15)[5] & ! data$CTPCNHBL15 %in% c(NA) & 
                    ! data$turnout_general %in% c(NA) & ! data$treat_framing %in% c(NA,""),]
  dta.b75 <- dta.b75[! dta.b75$turnout_general %in% c(NA),]
  loutBlack75 <- eval(bquote(lm(.(yvar) ~ treat.general + treat.neighbor + treat.primary.general + as.factor(sex) +
                      as.factor(party) + as.factor(ward) + 
                      as.factor(age_bin), data = dta.b75)))
  list_lm_return[[paste0(level, "loutBlack75Summary")]] <- summary(loutBlack75)
  rm(dta.b75, loutBlack75)
  #College Analysis
  data$CTPCCOLL15 <- (data$SE_T025_005.15 + data$SE_T025_006.15 + 
                        data$SE_T025_007.15 + data$SE_T025_008.15)/data$SE_T014_001.15
  data.college25 <- data[data$CTPCCOLL15 < summary(data$CTPCCOLL15)[2] & ! 
                           data$CTPCCOLL15 %in% c(NA) & ! data$turnout_general %in% c(NA) 
                         & ! data$treat_framing %in% c(NA, ""),]
  data.college25 <- data.college25[! data.college25$turnout_general %in% c(NA),]
  loutCollege25 <- eval(bquote(lm(.(yvar) ~ treat.general + treat.neighbor + treat.primary.general + as.factor(sex) +
                        as.factor(party) + as.factor(ward) + 
                        as.factor(age_bin), data = data.college25)))
  list_lm_return[[paste0(level, "loutCollege25Summary")]] <- summary(loutCollege25)
  rm(data.college25, loutCollege25)
  data.college75 <- data[data$CTPCCOLL15 > summary(data$CTPCCOLL15)[5] & ! 
                           data$CTPCCOLL15 %in% c(NA) & ! data$turnout_general %in% c(NA) 
                         & ! data$treat_framing %in% c(NA, ""),]
  data.college75 <- data.college75[! data.college75$turnout_general %in% c(NA),]
  loutCollege75 <- eval(bquote(lm(.(yvar) ~ treat.general + treat.neighbor + treat.primary.general + as.factor(sex) +
                        as.factor(party) + as.factor(ward) + 
                        as.factor(age_bin), data = data.college75)))
  list_lm_return[[paste0(level, "loutCollege75Summary")]] <- summary(loutCollege75)
  rm(data.college75, loutCollege75)
  ## by mobility 
  dta.mobility25 <- data %>% 
    filter(CTPCSAHO15 < quantile(data$CTPCSAHO15, c(.25), na.rm = TRUE))
  loutMobility25 <- eval(bquote(lm(.(yvar) ~ treat.general + treat.neighbor + treat.primary.general + as.factor(sex) +
                         as.factor(party) + as.factor(ward) + 
                         as.factor(age_bin), data = dta.mobility25)))
  list_lm_return[[paste0(level, "loutMobility25Summary")]] <- summary(loutMobility25)
  rm(dta.mobility25, loutMobility25)
  dta.mobility75 <- data %>% 
    filter(CTPCSAHO15 > quantile(data$CTPCSAHO15, c(.75), na.rm = TRUE))
  loutMobility75 <- eval(bquote(lm(.(yvar) ~ treat.general + treat.neighbor + treat.primary.general + as.factor(sex) +
                         as.factor(party) + as.factor(ward) + 
                         as.factor(age_bin), data = dta.mobility75)))
  list_lm_return[[paste0(level, "loutMobility75Summary")]] <- summary(loutMobility75)
  rm(dta.mobility75, loutMobility75)
  return(list_lm_return)
}
lm_by_voter_histories <- sapply(split(dta.clean, dta.clean$vote_history), original_lm_analysis_minus_history, yvar = "turnout_general")

## Do the same thing but for primary turnout
primary_lm_analysis_minus_history <- function(data, yvar, basic = FALSE){
  if (!basic){
    level <- trimws(data$vote_history[1])
  }
  else{
    level <- "Default"
  }
  data <- data %>% 
    filter(!is.na(!!yvar))
  print(level)
  list_lm_return <- list()
  yvar <- as.symbol(yvar)
  lout_basic <- eval(bquote(lm( .(yvar) ~ I(treat %in% c("Primary | City")) + I(treat %in% c("Primary | Neighbor")) + as.factor(sex) +
                                  as.factor(party) + as.factor(ward) + 
                                  as.factor(age_bin), data = data)))
  list_lm_return[[paste0(level, "loutMainAnalysis")]] <- summary(lout_basic)
  rm(lout_basic)
  # Subsetting for the black quantiles
  dta.b25 <- data[data$CTPCNHBL15 < summary(data$CTPCNHBL15)[2] & ! data$CTPCNHBL15 %in% c(NA) & 
                    ! data$turnout_general %in% c(NA) & ! data$treat_framing %in% c(NA,""),]
  dta.b25 <- dta.b25[! dta.b25$turnout_general %in% c(NA),]
  loutBlack25 <- eval(bquote(lm(.(yvar) ~ I(treat %in% c("Primary | City")) + I(treat %in% c("Primary | Neighbor")) + as.factor(sex) +
                                  as.factor(party) + as.factor(ward) + 
                                  as.factor(age_bin), data = dta.b25)))
  list_lm_return[[paste0(level, "loutBlack25Summary")]] <- summary(loutBlack25)
  rm(dta.b25, loutBlack25)
  dta.b75 <- data[data$CTPCNHBL15 > summary(data$CTPCNHBL15)[5] & ! data$CTPCNHBL15 %in% c(NA) & 
                    ! data$turnout_general %in% c(NA) & ! data$treat_framing %in% c(NA,""),]
  dta.b75 <- dta.b75[! dta.b75$turnout_general %in% c(NA),]
  loutBlack75 <- eval(bquote(lm(.(yvar) ~ I(treat %in% c("Primary | City")) + I(treat %in% c("Primary | Neighbor")) + as.factor(sex) +
                                  as.factor(party) + as.factor(ward) + 
                                  as.factor(age_bin), data = dta.b75)))
  list_lm_return[[paste0(level, "loutBlack75Summary")]] <- summary(loutBlack75)
  rm(dta.b75, loutBlack75)
  #College Analysis
  data$CTPCCOLL15 <- (data$SE_T025_005.15 + data$SE_T025_006.15 + 
                        data$SE_T025_007.15 + data$SE_T025_008.15)/data$SE_T014_001.15
  data.college25 <- data[data$CTPCCOLL15 < summary(data$CTPCCOLL15)[2] & ! 
                           data$CTPCCOLL15 %in% c(NA) & ! data$turnout_general %in% c(NA) 
                         & ! data$treat_framing %in% c(NA, ""),]
  data.college25 <- data.college25[! data.college25$turnout_general %in% c(NA),]
  loutCollege25 <- eval(bquote(lm(.(yvar) ~ I(treat %in% c("Primary | City")) + I(treat %in% c("Primary | Neighbor")) + as.factor(sex) +
                                    as.factor(party) + as.factor(ward) + 
                                    as.factor(age_bin), data = data.college25)))
  list_lm_return[[paste0(level, "loutCollege25Summary")]] <- summary(loutCollege25)
  rm(data.college25, loutCollege25)
  data.college75 <- data[data$CTPCCOLL15 > summary(data$CTPCCOLL15)[5] & ! 
                           data$CTPCCOLL15 %in% c(NA) & ! data$turnout_general %in% c(NA) 
                         & ! data$treat_framing %in% c(NA, ""),]
  data.college75 <- data.college75[! data.college75$turnout_general %in% c(NA),]
  loutCollege75 <- eval(bquote(lm(.(yvar) ~ I(treat %in% c("Primary | City")) + I(treat %in% c("Primary | Neighbor")) + as.factor(sex) +
                                    as.factor(party) + as.factor(ward) + 
                                    as.factor(age_bin), data = data.college75)))
  list_lm_return[[paste0(level, "loutCollege75Summary")]] <- summary(loutCollege75)
  rm(data.college75, loutCollege75)
  ## by mobility 
  dta.mobility25 <- data %>% 
    filter(CTPCSAHO15 < quantile(data$CTPCSAHO15, c(.25), na.rm = TRUE))
  loutMobility25 <- eval(bquote(lm(.(yvar) ~ I(treat %in% c("Primary | City")) + I(treat %in% c("Primary | Neighbor")) + treat.primary.general + as.factor(sex) +
                                     as.factor(party) + as.factor(ward) + 
                                     as.factor(age_bin), data = dta.mobility25)))
  list_lm_return[[paste0(level, "loutMobility25Summary")]] <- summary(loutMobility25)
  rm(dta.mobility25, loutMobility25)
  dta.mobility75 <- data %>% 
    filter(CTPCSAHO15 > quantile(data$CTPCSAHO15, c(.75), na.rm = TRUE))
  loutMobility75 <- eval(bquote(lm(.(yvar) ~ I(treat %in% c("Primary | City")) + I(treat %in% c("Primary | Neighbor")) + as.factor(sex) +
                                     as.factor(party) + as.factor(ward) + 
                                     as.factor(age_bin), data = dta.mobility75)))
  list_lm_return[[paste0(level, "loutMobility75Summary")]] <- summary(loutMobility75)
  rm(dta.mobility75, loutMobility75)
  return(list_lm_return)
}
list_of_lm_primary <- primary_lm_analysis_minus_history(dta.clean, "turnout_primary", basic = TRUE)
lm_by_voter_histories_primary <- sapply(split(dta.clean, dta.clean$vote_history), primary_lm_analysis_minus_history, yvar = "turnout_primary")


# The lms for the general election 
output <-  c(list_of_lm, lm_by_voter_histories)
name_lm_generate <- function(level, lms){
  returned_names <-  paste0(gsub(" ", "", level), lms)
  return(returned_names)
}
basic_names <- paste0("Basic", names(output)[1:7])
lm_names <- names(output)[1:7]
new_names <- basic_names
for(a in levels(as.factor(dta.clean$vote_history))){
  new_names <- append(new_names, name_lm_generate(a, lm_names))
}
names(output) <- new_names
#saveRDS(output, file = "list-linear-models.rds") ### Save the output to a rdata file

# The lms for the primary
output_primary <- c(list_of_lm_primary, lm_by_voter_histories_primary)

basic_names_primary <- paste0("Basic", names(output_primary)[1:7])
lm_names_primary <- names(output_primary)[1:7]
new_names_primary <- basic_names_primary
for(a in levels(as.factor(dta.clean$vote_history))){
  new_names_primary <- append(new_names_primary, name_lm_generate(a, lm_names_primary))
}
names(output_primary) <- new_names_primary
#saveRDS(output_primary, file = "list-linear-models-primary-only.rds") ### Save the output to a rdata file


#### Producing Figure 1 and Appendix Figure 8 based on regression results from above ####

### primary election 2019 regression results
#dtap <- readRDS("list-linear-models-primary-only.rds")

### general election 2019 regression results
#dta <- readRDS("list-linear-models.rds")

#####
loutH3 <- lm(turnout_general ~ treat.general + treat.neighbor + treat.primary.general + as.factor(sex) +
               as.factor(party) + as.factor(ward) + 
               as.factor(vote_history) + as.factor(age_bin), data = dta.clean)


BasicloutMainAnalysis <- summary(loutH3)

adjustment <- 0.2887078-BasicloutMainAnalysis$coefficients[1,1]

M <- 10000
betas <- mvrnorm(M,mu=BasicloutMainAnalysis$coefficients[,1],Sigma=vcov(BasicloutMainAnalysis))
xpg <- xg <- xc <- rep(0,dim(betas)[2])

xpg[1] <- xg[1] <- xc[1] <- 1
xg[2] <- 1
xpg[4] <- 1

pred.xpg <- (xpg)%*%t(betas)+adjustment
pred.xg <- xg%*%t(betas)+adjustment
pred.xc <- xc%*%t(betas)+adjustment

mn <- quantile(c(pred.xpg,pred.xg,pred.xc),.01)
mx <- quantile(c(pred.xpg,pred.xg,pred.xc),.99)+.10


#####

mx <- quantile(c(pred.xpg,pred.xg,pred.xc),.99)+.165

# Figure 1
pdf("primary-exp-overall-pooled.pdf",width=6)

bp <- barplot(c(BasicloutMainAnalysis$coefficients[1,1]+adjustment,
                
                BasicloutMainAnalysis$coefficients[1,1]+BasicloutMainAnalysis$coefficients[2,1]+adjustment,
                BasicloutMainAnalysis$coefficients[1,1]+BasicloutMainAnalysis$coefficients[4,1]+adjustment),
              names.arg=c("Control","General","Primary & General"),
              ylim=c(0,mx),
              main="Effects, All Registrants")

lines(x=c(bp[1,1],bp[1,1]),
      y=c(quantile(pred.xc,.025),quantile(pred.xc,.975)),
      lwd=2)

lines(x=c(bp[2,1],bp[2,1]),
      y=c(quantile(pred.xg,.025),quantile(pred.xg,.975)),
      lwd=2)

lines(x=c(bp[3,1],bp[3,1]),
      y=c(quantile(pred.xpg,.025),quantile(pred.xpg,.975)),
      lwd=2)

text(x=bp[1,1],y=.15,
     paste("Mean=",round(BasicloutMainAnalysis$coefficients[1,1]+adjustment,digits=3),sep="")
)
text(x=bp[2,1],y=.15,
     paste("Mean=",round(BasicloutMainAnalysis$coefficients[1,1]+BasicloutMainAnalysis$coefficients[2,1]+adjustment,digits=3),
           sep="")
)
text(x=bp[3,1],y=.15,
     paste("Mean=",round(BasicloutMainAnalysis$coefficients[1,1]+BasicloutMainAnalysis$coefficients[4,1]+adjustment,digits=3),
           sep="")
)



brackets(x1=bp[1,1],x2=bp[2,1],y1=(mx-.16),y2=(mx-0.16),h=0.01)
text(x=mean(c(bp[1,1],bp[2,1])),y=mx-.13,paste("Treatment=",
                                               round(BasicloutMainAnalysis$coefficients[2,1],digits=3),
                                               ", \n P-value=",
                                               round(BasicloutMainAnalysis$coefficients[2,4],digits=3),
                                               sep=""))


brackets(x1=bp[2,1],x2=bp[3,1],y1=(mx-.1125),y2=(mx-0.1125),h=0.01)
text(x=mean(c(bp[2,1],bp[3,1])),y=mx-0.08,paste("Treatment=",
                                                round(BasicloutMainAnalysis$coefficients[4,1]-BasicloutMainAnalysis$coefficients[2,1],
                                                      digits=3),
                                                ", \n P-value = 0.05",
                                                sep=""))


brackets(x1=bp[1,1],x2=bp[3,1],y1=(mx-.0525),y2=(mx-0.0525),h=0.01)
text(x=mean(c(bp[1,1],bp[3,1])),y=mx-0.0225,paste("Treatment=",
                                                  round(BasicloutMainAnalysis$coefficients[4,1],digits=3),
                                                  ", \n P-value < 0.001",
                                                  sep=""))

dev.off()



####### plots for geographic subgroups, Appendix figure 8

barplot.func2 <- function(x=output$BasicloutMainAnalysis,
                          mntxt="Effects, All Voters",
                          baseline=0.13,
                          boost=0.02,
                          boost2=0,
                          boost3=0,
                          adjustment=0,
                          p.vec){
  
  M <- 10000
  betas <- mvrnorm(M,mu=x$coefficients[,1],Sigma=vcov(x))
  xpg <- xg <- xc <- rep(0,dim(betas)[2])
  
  xpg[1] <- xg[1] <- xc[1] <- 1
  xg[2] <- 1
  xpg[4] <- 1
  
  pred.xpg <- (xpg)%*%t(betas)+adjustment
  pred.xg <- xg%*%t(betas)+adjustment
  pred.xc <- xc%*%t(betas)+adjustment
  
  mn <- quantile(c(pred.xpg,pred.xg,pred.xc),.01)
  mx <- quantile(c(pred.xpg,pred.xg,pred.xc),.99)+.10
  #####
  
  bp <- barplot(c(x$coefficients[1,1]+adjustment,
                  x$coefficients[1,1]+x$coefficients[2,1]+adjustment,
                  x$coefficients[1,1]+x$coefficients[4,1]+adjustment),
                names.arg=c("Control","General","Primary & General"),
                ylim=c(0,mx),
                main=mntxt)
  
  lines(x=c(bp[1,1],bp[1,1]),
        y=c(quantile(pred.xc,.025),quantile(pred.xc,.975)),
        lwd=2)
  
  lines(x=c(bp[2,1],bp[2,1]),
        y=c(quantile(pred.xg,.025),quantile(pred.xg,.975)),
        lwd=2)
  
  lines(x=c(bp[3,1],bp[3,1]),
        y=c(quantile(pred.xpg,.025),quantile(pred.xpg,.975)),
        lwd=2)
  
  text(x=bp[1,1],y=baseline+boost+adjustment,
       paste("Mean=",round(x$coefficients[1,1]+adjustment,digits=3),sep="")
  )
  text(x=bp[2,1],y=baseline+boost+adjustment,
       paste("Mean=",round(x$coefficients[1,1]+x$coefficients[2,1]+adjustment,digits=3),
             sep="")
  )
  text(x=bp[3,1],y=baseline+boost+adjustment,
       paste("Mean=",round(x$coefficients[1,1]+x$coefficients[4,1]+adjustment,digits=3),
             sep="")
  )
  
  brackets(x1=bp[1,1],x2=bp[2,1],y1=(mx-.075-boost3),y2=(mx-0.075-boost3),h=0.01)
  text(x=mean(c(bp[1,1],bp[2,1])),y=mx-.055,paste("Treatment=",
                                                  round(x$coefficients[2,1],digits=3),
                                                  ", P=",round(p.vec[1],digits=2),
                                                  sep=" "))
  
  brackets(x1=bp[1,1],x2=bp[3,1],y1=(mx-.035),y2=(mx-0.035),h=0.01)
  text(x=mean(c(bp[1,1],bp[3,1])),y=mx-0.015,paste("Treatment=",
                                                   round(x$coefficients[4,1],digits=3),
                                                   ", P=",round(p.vec[2],digits=2),
                                                   sep=" "))
  
}

pdf("primary-exp-by-neighborhood.pdf",width=8,height=8)
par(mfcol=c(2,2))

barplot.func2(output$BasicloutCollege25Summary,
              mntxt="Effects, Low Percent BAs",
              baseline=0.10,boost=0.03,
              adjustment=0.2142219-output$BasicloutCollege25Summary$coefficients[1,1],
              p.vec=output$BasicloutCollege25Summary$coefficients[c(2,4),4])

barplot.func2(output$BasicloutCollege75Summary,
              mntxt="Effects, High Percent BAs",
              baseline=0.10,boost=0.04,
              adjustment=0.3523896-output$BasicloutCollege25Summary$coefficients[1,1],
              p.vec=output$BasicloutCollege75Summary$coefficients[c(2,4),4])

barplot.func2(output$BasicloutMobility25Summary,
              mntxt="Effects, Low Mobility",
              baseline=0.15,boost=0.04,
              adjustment=0.2616633-output$BasicloutMobility25Summary$coefficients[1,1],
              p.vec=output$BasicloutMobility25Summary$coefficients[c(2,4),4])

barplot.func2(output$BasicloutBlack75Summary,
              mntxt="Effects, Heavily Black Neighborhoods",
              baseline=0.10,boost=0.05,
              adjustment=0.3018589-output$BasicloutBlack75Summary$coefficients[1,1],
              p.vec=output$BasicloutBlack75Summary$coefficients[c(2,4),4])

dev.off()
